library("rio"); library("sjmisc"); library("stringr"); library("dplyr")
itanes <- rio::import(here::here("data", "processed","panel.dta"))
load(here::here("risp-rr", "results_wordfish.Rdata"))
results <- results_wf
rm(results_wf)

doc_scores <- tibble(
  omega = results_wf$theta, 
  omega_se = results_wf$se.theta,
  alpha = results_wf$alpha,
  id = results_wf$docs
) %>% 
  tidyr::separate(col = id, into = c("TVchannel", "date", "edition", "dummy"), sep = "_", remove = FALSE) %>% 
  select(-dummy)

omega <- data.frame(document=results@docs,
                    theta=results@theta,
                    se=results@se.theta, stringsAsFactors = F)
omega$tg <- sapply(strsplit(omega$document, "\\."), "[[", 1)
omega$date <- sapply(strsplit(omega$document, "\\."), "[[", 2)
omega$hour <- sapply(strsplit(omega$document, "\\."), "[[", 3)
omega$date <- as.Date(omega$date)

# Merge with TV news omega scores ----------------------------
#       - for each ID look at tg, then `interview_date`
#       - compute mean omega score in past 30 days
itanes[which(itanes$tg==1), "tg"] <- "TG1"
itanes[which(itanes$tg==2), "tg"] <- "TG2"
itanes[which(itanes$tg==3), "tg"] <- "TG3"    
itanes[which(itanes$tg==4), "tg"] <- "TG4"
itanes[which(itanes$tg==5), "tg"] <- "TG5"
itanes[which(itanes$tg==6), "tg"] <- "StudioAperto"
itanes[which(itanes$tg==7), "tg"] <- "TG7"
itanes[which(itanes$tg==8), "tg"] <- NA     # Other/DK

# Drop missing values:
itanes <- itanes[-which(is.na(itanes$tg)),]

# Positive intervals (interviews occurring after news day):
difftime("2011-02-10", "2011-02-09")

for (i in 1:nrow(itanes)){
        cat(".")
        # Condition: news broadcast kept up to 30 days before interview 
        cond.30days <- difftime(itanes[i,"interview_date"], omega$date,units="days") %in% 1:30
        tmp.omegasubset <- subset(omega, omega$tg==itanes[i,"tg"] & cond.30days==TRUE)
        # Compute the mean:
        itanes$omega.30days[i] <- mean(tmp.omegasubset$theta,na.rm=T)
}

# Standardize variables -----------------------------------
itanes$omega.30days.std <- scale(itanes$omega.30days, center = T, scale = T)[,1]
itanes$lrdist_pdl.std <- scale(itanes$lrdist_pdl, center = T, scale = T)[,1]
itanes$pid_pdl.std <- scale(itanes$pid_pdl, center = T, scale = T)[,1]
itanes$lrdist_pd.std <- scale(itanes$lrdist_pd, center = T, scale = T)[,1]
itanes$pid_pd.std <- scale(itanes$pid_pd, center = T, scale = T)[,1]

# Coding favourite TV news program as a factor:
table(itanes$tg)
itanes$tg <- factor(itanes$tg)
table(itanes$tg)
levels(itanes$tg) # Berlusconi's Studio Aperto as baseline


# Filter only variables that are used:
# itanes.fit <- select(itanes, 
#                      id, wave, ptv_pdl, omega.30days, omega.30days_plac, omega.30days_plac2,
#                      tg, pid_pdl, gov, lr, lrdist_pdl, educ, polinfo, polinfo2)

itanes$wave <- factor(itanes$wave)
save(itanes, file=file.path(here::here(), "data", "processed", "panelWithOmega.Rdata"))
